In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
In [3]:
sns.set(style="whitegrid")
In [4]:
df = sns.load_dataset("iris")

df.head()
Out[4]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
In [5]:
df.describe()
Out[5]:
sepal_length sepal_width petal_length petal_width
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.057333 3.758000 1.199333
std 0.828066 0.435866 1.765298 0.762238
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000
In [6]:
np_array = df.select_dtypes(include=[float]).values
np_array[:5]
Out[6]:
array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])
In [7]:
means = np.mean(np_array, axis=0)
stds = np.std(np_array, axis=0)
means, stds
Out[7]:
(array([5.84333333, 3.05733333, 3.758     , 1.19933333]),
 array([0.82530129, 0.43441097, 1.75940407, 0.75969263]))
In [8]:
plt.hist(df["sepal_length"])
plt.title("Distribution des longueurs de sépale")
plt.xlabel("Longueur")
plt.ylabel("Fréquence")
plt.show()
No description has been provided for this image
In [9]:
plt.plot(df["sepal_length"])
plt.title("Longueur de sépale")
plt.xlabel("Index")
plt.ylabel("Longueur")
plt.show()
No description has been provided for this image
In [10]:
sns.pairplot(df, hue="species")
plt.show()
No description has been provided for this image
In [11]:
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[11], line 2
      1 plt.figure(figsize=(8,6))
----> 2 sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
      3 plt.title("Correlation Heatmap")
      4 plt.show()

File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:11049, in DataFrame.corr(self, method, min_periods, numeric_only)
  11047 cols = data.columns
  11048 idx = cols.copy()
> 11049 mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
  11051 if method == "pearson":
  11052     correl = libalgos.nancorr(mat, minp=min_periods)

File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:1993, in DataFrame.to_numpy(self, dtype, copy, na_value)
   1991 if dtype is not None:
   1992     dtype = np.dtype(dtype)
-> 1993 result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value)
   1994 if result.dtype is not dtype:
   1995     result = np.asarray(result, dtype=dtype)

File ~\anaconda3\Lib\site-packages\pandas\core\internals\managers.py:1694, in BlockManager.as_array(self, dtype, copy, na_value)
   1692         arr.flags.writeable = False
   1693 else:
-> 1694     arr = self._interleave(dtype=dtype, na_value=na_value)
   1695     # The underlying data was copied within _interleave, so no need
   1696     # to further copy if copy=True or setting na_value
   1698 if na_value is lib.no_default:

File ~\anaconda3\Lib\site-packages\pandas\core\internals\managers.py:1753, in BlockManager._interleave(self, dtype, na_value)
   1751     else:
   1752         arr = blk.get_values(dtype)
-> 1753     result[rl.indexer] = arr
   1754     itemmask[rl.indexer] = 1
   1756 if not itemmask.all():

ValueError: could not convert string to float: 'setosa'
<Figure size 800x600 with 0 Axes>
In [12]:
fig = px.scatter(
    df, x="sepal_length", y="sepal_width",
    color="species", title="Plotly Scatter – Iris"
)
fig.show()
In [13]:
fig = px.scatter_3d(
    df,
    x="sepal_length",
    y="sepal_width",
    z="petal_length",
    color="species",
    title="Iris – 3D Plot"
)
fig.show()
In [14]:
# Charger un dataset Kaggle ou un CSV local
titanic = sns.load_dataset("titanic")
titanic.head()
Out[14]:
survived pclass sex age sibsp parch fare embarked class who adult_male deck embark_town alive alone
0 0 3 male 22.0 1 0 7.2500 S Third man True NaN Southampton no False
1 1 1 female 38.0 1 0 71.2833 C First woman False C Cherbourg yes False
2 1 3 female 26.0 0 0 7.9250 S Third woman False NaN Southampton yes True
3 1 1 female 35.0 1 0 53.1000 S First woman False C Southampton yes False
4 0 3 male 35.0 0 0 8.0500 S Third man True NaN Southampton no True
In [15]:
titanic = titanic.dropna(subset=["age"])
In [16]:
sns.barplot(data=titanic, x="class", y="age", hue="sex")
plt.title("Âge moyen par classe – Titanic")
plt.show()
No description has been provided for this image
In [17]:
plt.boxplot(titanic["age"])
plt.title("Distribution âges – Titanic")
plt.show()
No description has been provided for this image
In [18]:
fig = px.sunburst(
    titanic,
    path=["class", "sex", "survived"],
    values="age",
    title="Sunburst – Titanic"
)
fig.show()
C:\Users\mee\anaconda3\Lib\site-packages\plotly\express\_core.py:1727: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

C:\Users\mee\anaconda3\Lib\site-packages\plotly\express\_core.py:1727: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

C:\Users\mee\anaconda3\Lib\site-packages\plotly\express\_core.py:1727: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

In [ ]: